#!/bin/sh

# Attempt to guess suitable flags for the Fortran compiler.

###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# if you want to compile with OpenMP, add this to the flags listed below, depending on the compiler you use:
#
#   -fopenmp for GNU gfortran
#   -openmp for Intel ifort
#   -mp for Portland pgfortran
#   -qsmp=omp for IBM xlf

###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# You can add -DFORCE_VECTORIZATION to the options below (for all compilers, except for IBM for which the syntax is -WF,-DFORCE_VECTORIZATION)
# to speed up the code in the fluid (acoustic) parts (only; FORCE_VECTORIZATION support for elastic parts has been discontinued in the source code).
# This works fine if (and only if) your computer always allocates a contiguous memory block for each allocatable array;
# this is the case for most machines and most compilers, but not all. For more details see https://github.com/geodynamics/specfem3d/issues/81 .
# To check if that option works fine on your machine, run the code with and without it for a model containing a significant fluid layer
# (or entirely fluid) and make sure the seismograms are identical.

# you can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
# to the disk for one MPI slice after the other, and to make the solver do the same thing when reading the files back from disk.


# First find the "real" compiler
# e.g., Cray provides a Programming Environment that wraps around compilers,
# but the command-line options are still compiler-specific.
case $FC in
    ftn|*/ftn)
        case $PE_ENV in
            CRAY)
                my_FC=crayftn
                ;;
            GNU)
                my_FC=gfortran
                ;;
            INTEL)
                my_FC=ifort
                ;;
            PATHSCALE)
                my_FC=pathf90
                ;;
            PGI)
                my_FC=pgfortran
                ;;
            *)
                # Unrecognized (this will not set any default flags)
                my_FC="$FC"
                ;;
        esac
        ;;
    *)
        my_FC="$FC"
        ;;
esac

case $my_FC in
    ftn|*/ftn|crayftn|*/crayftn)
        #
        # Cray Fortran
        #
        DEF_FFLAGS="-M 1193 -M 1438"
        OPT_FFLAGS="-O3 -Onoaggress -Oipa0 -hfp2 -Ovector3 -Oscalar3 -Ocache2 -Ounroll2 -Ofusion2"
        # -Oaggress -Oipa4 would make it even more aggressive
        DEBUG_FFLAGS="-eC -eD -ec -en -eI -ea -g -G0"
        ;;
    pgf95|*/pgf95|pgf90|*/pgf90|pgfortran|*/pgfortran)
        #
        # Portland PGI
        #
        DEF_FFLAGS="-Mdclchk -Minform=warn -mcmodel=medium"
        OPT_FFLAGS="-Mnobounds -fast"
        DEBUG_FFLAGS="-Mbounds"
        ;;
    ifort|*/ifort)
        #
        # Intel ifort Fortran90 for Linux
        # check: http://software.intel.com/sites/products/documentation/hpc/compilerpro/en-us/fortran/lin/compiler_f/index.htm
        #
        # option "-assume buffered_io" is important especially on
        # parallel file systems like SFS 3.2 / Lustre 1.8. If omitted
        # I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
        # However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1
        #
        # optimization report: "-vec-report0" is old and will be replaced by "-qopt-report0 -qopt-report-phase=vec" for v >=15.0
        DEF_FFLAGS="-xHost -fpe0 -ftz -assume buffered_io -assume byterecl -align sequence -std03 -diag-disable 6477 -implicitnone -gen-interfaces -warn all" # -mcmodel=medium -shared-intel
        OPT_FFLAGS="-O3 -check nobounds"
        DEBUG_FFLAGS="-check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv"
        ;;
    gfortran|*/gfortran|f95|*/f95)
        #
        # GNU gfortran
        #
        # optimization report: "-fopt-info" or very verbose: "-fopt-info-all"
        DEF_FFLAGS="-std=f2003 -fimplicit-none -frange-check -fmax-errors=10 -pedantic -pedantic-errors -Waliasing -Wampersand -Wcharacter-truncation -Wline-truncation -Wsurprising -Wno-tabs -Wunderflow -ffpe-trap=invalid,zero,overflow -Wunused -Werror" # -mcmodel=medium
        OPT_FFLAGS="-O3 -finline-functions"
        DEBUG_FFLAGS="-g -O0 -ggdb -fbacktrace -fbounds-check"
        # useful to track loss of accuracy because of automatic double to single precision conversion:  -Wconversion  (this may generate many warnings...)
        ;;
    f90|*/f90)
        case $host_os in
            Linux)
                #
                # AbSoft
                #
                case $host_cpu in
                    i*86 | x86_64)
                        DEF_FFLAGS="-W132 -s -cpu:p7 -v -YDEALLOC=ALL"
                        OPT_FFLAGS="-O3"
                        DEBUG_FFLAGS=""
                        ;;
                esac
                ;;
            irix)
                ################ SGI Irix #################
                DEF_MPIFC=$FC
                DEF_MPILIBS="-lmpi -lfastm -lfpe"
                DEF_FFLAGS="-ansi -u -64 -OPT:Olimit=0 -OPT:roundoff=3 -OPT:IEEE_arithmetic=3 -r10000 -mips4"
                OPT_FFLAGS="-O3"
                DEBUG_FFLAGS="-check_bounds"
                ;;
            superux*)
                ################## NEC SX ##################
                DEF_MPIFC=$FC
                DEF_FFLAGS="-C hopt -R2 -Wf\" -L nostdout noinclist mrgmsg noeject -msg b -pvctl loopcnt=14000000 expand=10 fullmsg vecthreshold=20 -s\" -pi auto line=100 exp=swap_all,rank"
                OPT_FFLAGS=""
                DEBUG_FFLAGS=""
                ;;
        esac
        ;;
    lf95|*/lf95)
        #
        # Lahey f90
        #
        DEF_FFLAGS="--warn --wo --tpp --f95 --dal"
        OPT_FFLAGS="-O"
        DEBUG_FFLAGS="--chk"
        ;;
    ######## IBM ######
    mpxlf*|*/mpxlf*)
        DEF_MPIFC=$FC
        ;;
    *xlf*|*/*xlf*)
        #
        # on some (but not all) IBM machines one might need to add -qsave otherwise the IBM compiler allocates the
        # arrays in the stack and the code crashes if the stack size is too
        # small (which is sometimes the case, but less often these days on large machines)
        #
        # you will probably need to add " module load bgq-xl " or similar to your .bash_profile to load the compilers
        #
        # It could also help to put this in your .bash_profile: export XLFRTEOPTS=aggressive_array_io=yes:buffering=enable
        #
        # on IBM with xlf one should also set
        #
        # CC = xlc_r
        # CFLAGS = -O3 -q64
        #
        # or
        #
        # CC = gcc
        # CFLAGS = -O3 -m64
        #
        # for the C compiler when using -q64 for the Fortran compiler
        #
        # on IBM xlf90 compiler:
        # when encountering errors: ...relocation truncated to fit: R_PPC_LOCAL24PC...
        # one should also use additional flags:
        # CFLAGS = -Wl,-relax
        # Note that the above message would only occur on a Linux machine not on AIX.
        # "relax" is not an option for the AIX linker and is interpreted in ways that will
        # in binaries that are not executable.
        #
        # deleted -qxflag=dvz because it requires handler function __xl_dzx and thus linking will fail.
        #
        # The -qstrict option prevents some minor differences of results between xlf and
        # other compilers. There is a small decrease in performance, but generally
        # it is small or negligible compared to other issues like slow I/O.
        #
        DEF_FFLAGS="-qassert=contig -qhot -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003std -qzerosize -g -qsuppress=1518-234 -qsuppress=1518-317 -qsuppress=1518-318 -qsuppress=1500-036 -qsuppress=1515-009"
        OPT_FFLAGS="-O4 -qstrict"
        # Options -qreport -qsource -qlist create a *.lst file containing detailed information about vectorization.
        DEBUG_FFLAGS="-g -O0 -C -qddim -qfullpath -qflttrap=overflow:zerodivide:invalid:enable -qfloat=nans -qinitauto=7FBFFFFF"
        #
        # On IBM BlueGene at IDRIS (France) use:
        # -qtune=auto -qarch=450d -qsave     instead of -qtune=auto -qarch=auto
        #
        ;;
    pathf90|*/pathf90)
        #
        # pathscale
        #
        # one should also set
        # CC = pathcc
        # CFLAGS = -O2
        #
        DEF_FFLAGS="-fno-math-errno -ffast-math -msse3 -march=auto -fno-second-underscore -align64"
        OPT_FFLAGS="-O3 -OPT:Ofast -LNO:fusion=2 -LNO:simd=2 -LNO:simd_verbose=ON"
        DEBUG_FFLAGS="-g2"
        ;;

esac

case $build_os in
    irix)
        ################ SGI Irix #################
        ##
        ##  CAUTION: always define setenv TRAP_FPE OFF on SGI before compiling
        ##
        FCENV="TRAP_FPE=OFF"
        ;;
esac

# If you wish, set CFLAGS here instead of on the command-line.
# This will then be persistent across calls to configure.
# If you don't set it, then the default value will be determined by autoconf.
#DEF_CFLAGS="-g -O3"

if test "x$FLAGS_CHECK" = "x"; then
    if test "x$COND_DEBUG_TRUE" = "x"; then
        FLAGS_CHECK="$DEF_FFLAGS $DEBUG_FFLAGS"
    else
        FLAGS_CHECK="$DEF_FFLAGS $OPT_FFLAGS"
    fi
fi
if test "x$MPIFC" = "x"; then
    MPIFC="$DEF_MPIFC"
    if test "x$MPILIBS" = "x"; then
        MPILIBS="$DEF_MPILIBS"
    fi
fi

echo MPIFC=\"$MPIFC\" | sed 's/\$/\\\$/g'
echo MPILIBS=\"$MPILIBS\" | sed 's/\$/\\\$/g'
echo FLAGS_CHECK=\"$FLAGS_CHECK\" | sed 's/\$/\\\$/g'
echo FCENV=\"$FCENV\" | sed 's/\$/\\\$/g'
if [ -n "${DEF_CFLAGS+set}" -a -z "${CFLAGS+set}" ]; then
# We only set this if it's set in this file and the user hasn't
# overridden it on the command-line.
echo CFLAGS=\"$DEF_CFLAGS\" | sed 's/\$/\\\$/g'
fi

# Added by IDRIS: set to default values if not passed as arguments
if [ "X${AR}" == "X" ]; then AR="ar"; fi
if [ "X${RANLIB}" == "X" ]; then RANLIB="ranlib"; fi
if [ "X${ARFLAGS}" == "X" ]; then ARFLAGS="cru"; fi

# Added by IDRIS to crosscompile on BlueGene/P
echo AR=\"$AR\" | sed 's/\$/\\\$/g'
echo ARFLAGS=\"$ARFLAGS\" | sed 's/\$/\\\$/g'
echo RANLIB=\"$RANLIB\" | sed 's/\$/\\\$/g'

# end of file
